package com.example.demo.service;

import lombok.Data;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.downloader.HttpClientDownloader;
import us.codecraft.webmagic.model.OOSpider;
import us.codecraft.webmagic.model.annotation.ExtractBy;
import us.codecraft.webmagic.model.annotation.TargetUrl;
import us.codecraft.webmagic.proxy.Proxy;
import us.codecraft.webmagic.proxy.SimpleProxyProvider;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
@TargetUrl(value = "http://www.kuaidaili.com/free/intr/")
@Data
public class IpSpieder {

    @ExtractBy("//td[@data-title='IP']/text()")
    List<String> ip;
    @ExtractBy("//td[@data-title='PORT']/text()")
    List<String> port;
    @ExtractBy("//td[@data-title='匿名度']/text()")
    List<String> anonymous;
    @ExtractBy("//td[@data-title='类型']/text()")
    List<String> type;
    @ExtractBy("//td[@data-title='位置']/text()")
    List<String> position;
    @ExtractBy("//td[@data-title='响应速度']/text()")
    List<String> ResponseSpeed;
    @ExtractBy("//td[@data-title='最后验证时间']/text()")
    List<String> validationTime;

    public static void main(String[] args) {
        HttpClientDownloader httpClientDownloader = new HttpClientDownloader();
        httpClientDownloader.setProxyProvider(SimpleProxyProvider.from(new Proxy("221.5.80.66",3128)));
        Set<Integer> set = new HashSet<>();
        set.add(200);
        Site site = Site.me().setCycleRetryTimes(3).setSleepTime(1000).setAcceptStatCode(set);
        // 添加请求头 map类型
//        site.addHeader();
        Spider thread = OOSpider.create(site,
                // 使用自定义数据处理
                new IpSpiderServicePipeline("D:\\webmagic\\")
                // 输出Json格式文本文件
//                new JsonFilePageModelPipeline("D:\\webmagic\\")
                // 打印到控制台。
//                new ConsolePageModelPipeline()
                , IpSpieder.class)
                .addUrl("http://www.kuaidaili.com/free/intr/")//
                .thread(3);
//        thread.setDownloader(httpClientDownloader);
        thread.run();

    }

    //这里省略了所有字段的getter/setter方法。

}
